pacman::p_load(MetaStan, cmdstanr, dplyr, tidyr, rio, here, data.table,
posterior, ggdist, ggplot2, bayesplot)
# Set Stan options
options(mc.cores = parallel::detectCores())
color_scheme_set("red")Model Diagnostics and Posterior Predictive Check: Subgroups Model
Data
dat = import(here("data/raw_data.xlsx"))
setnames(dat, old=c("Int_Events","Control_Events"), new=c("r2", "r1"))
setnames(dat, old=c("Int_Total","Control_Total"), new=c("n2", "n1"))
J <- nrow(dat) # Number of studies (84)
y <- as.matrix(dat[, c("r1", "r2")]) # Events: control (r1), treatment (r2)
sample <- as.matrix(dat[, c("n1", "n2")]) # Sample sizes: control (n1), treatment (n2)
zero <- c(0, 0) # Zero vector for random effects mean
subgroup <- dat[, "Low_Dose"]
# Create the Stan data list
stan_data <- list(
J = J,
zero = zero,
y = y,
sample = sample,
subgroup = subgroup
)Model
# Compile the Stan model
zibglmm1 <- cmdstan_model(here("models/stan/zibglmm_subgroups_model1.stan"))csv_files <- list.files(here("models/storage"),
pattern = "^zibglmm_subgroups_model1.*\\.csv$",
full.names = TRUE)
if (length(csv_files) > 0 && all(file.exists(csv_files))) {
# Load the fit1 from CSV files
fit1 <- cmdstanr::as_cmdstan_fit(csv_files)
} else {
# fit1 the model if no CSV files are found
fit1 <- zibglmm1$sample(
data = stan_data,
chains = 4,
parallel_chains = 4,
iter_warmup = 2000,
iter_sampling = 2000,
adapt_delta = 0.99,
max_treedepth = 15,
seed = 123,
refresh = 0
)
# Save CSV files to persistent directory
fit1$save_output_files(dir = "models/storage")
}Diagnostics
draws_diagnostics <- fit1$draws(variables = c("mu", "sigma_nu", "pi", "beta"))
summary_df <- fit1$summary(variables = c("mu", "sigma_nu", "pi", "beta"))
print(summary_df[, c("variable", "ess_bulk", "ess_tail", "rhat")])# A tibble: 7 × 4
variable ess_bulk ess_tail rhat
<chr> <dbl> <dbl> <dbl>
1 mu[1] 3811. 4995. 1.00
2 mu[2] 3372. 4280. 1.00
3 sigma_nu[1] 6367. 5862. 1.00
4 sigma_nu[2] 4268. 4895. 1.00
5 pi 6017. 4859. 1.00
6 beta[1] 7461. 6591. 1.00
7 beta[2] 5817. 5792. 1.00
fit1$diagnostic_summary()$num_divergent
[1] 0 0 0 0
$num_max_treedepth
[1] 0 0 0 0
$ebfmi
[1] 0.9502290 0.9472499 0.9448076 0.9150427
Check for convergence and mixing of chains
mcmc_trace(draws_diagnostics, pars = c("mu[1]", "mu[2]", "sigma_nu[1]",
"sigma_nu[2]", "pi", "beta[1]", "beta[2]")) +
ggtitle("Trace Plots for Key Parameters")Check autocorrelation for key parameters
mcmc_acf(draws_diagnostics, pars = c("mu[1]", "mu[2]", "sigma_nu[1]",
"sigma_nu[2]", "pi", "beta[1]", "beta[2]"),
lags = 10) +
ggtitle("Autocorrelation")Posterior Predictive Check
A posterior predictive check (PPC) is a Bayesian method used to assess how well a statistical model fit1s the observed data by comparing it to data simulated from the model. It involves generating replicated datasets (\(y_{\text{rep}}\)) from the posterior predictive distribution, which combines the fit1ted model parameters with new random noise. Test statistics (e.g., means, variances, or proportions) are computed for both the observed data (\(y_{\text{obs}}\)) and the replicated data. By comparing these statistics—through visualizations (e.g., histograms) or numerical summaries (e.g., posterior p-values)—PPCs evaluate whether the model can reproduce key features of the data. If the observed statistics align with the replicated ones, the model is considered a good fit1; significant discrepancies suggest model misspecification.
Posterior predictive p-values (also called Bayesian p-values) are a useful way to quantify the fit1 of your model in a posterior predictive check (PPC). They measure the probability that a test statistic computed from the replicated data (y_rep) is more extreme than the same test statistic computed from the observed data (y_obs). A Bayesian p-value close to 0 or 1 indicates a poor fit1, while a value around 0.5 suggests the model captures the observed data well for that statistic.
For a test statistic \(T(y)\), the posterior predictive p-value is defined as:
\(p = P(T(y_{\text{rep}}) \geq T(y_{\text{obs}}) \mid y_{\text{obs}})\)
where \(y_{\text{rep}}\) is the replicated data from the posterior predictive distribution, and \(y_{\text{obs}}\) is the observed data.
# Extract observed and replicated data
y_obs <- as.matrix(dat[, c("r1", "r2")]) # Observed data (e.g., r1 for treatment, r2 for control)
y_rep <- fit1$draws("y_rep") # Posterior predictive samples
y_rep <- as_draws_matrix(y_rep) # Convert to matrix
J <- nrow(dat) # Number of studies
n_samples <- nrow(y_rep) # Number of posterior draws
# Extract subgroup covariate
subgroup <- dat$Low_Dose # Binary vector (0 or 1) for each study
# Extract model parameters for probability computation
draws1 <- fit1$draws(format = "df")
mu_control <- draws1$`mu[1]`
mu_treatment <- draws1$`mu[2]`
beta_control <- draws1$`beta[1]`
beta_treatment <- draws1$`beta[2]`
# Compute empirical probabilities
empirical_probabilities <- dat |>
reframe(Study = factor(1:nrow(dat)),
p1 = r1/n1,
p2 = r2/n2,
Subgroup = factor(subgroup, levels = c(0, 1), labels = c("Subgroup 0", "Subgroup 1")))1. Proportion of double zeros
# 1. PPC: Proportion of double zeros
double_zeros_obs_sub0 <- sum(y_obs[subgroup == 0, 1] == 0 & y_obs[subgroup == 0, 2] == 0) / sum(subgroup == 0)
double_zeros_obs_sub1 <- sum(y_obs[subgroup == 1, 1] == 0 & y_obs[subgroup == 1, 2] == 0) / sum(subgroup == 1)
double_zeros_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
sum(y_mat[subgroup == 0, 1] == 0 & y_mat[subgroup == 0, 2] == 0) / sum(subgroup == 0)
})
double_zeros_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
sum(y_mat[subgroup == 1, 1] == 0 & y_mat[subgroup == 1, 2] == 0) / sum(subgroup == 1)
})
double_zeros_rep_sub0 <- as.matrix(double_zeros_rep_sub0)
double_zeros_rep_sub1 <- as.matrix(double_zeros_rep_sub1)
p_value_double_zeros_sub0 <- mean(double_zeros_rep_sub0 >= double_zeros_obs_sub0)
p_value_double_zeros_sub1 <- mean(double_zeros_rep_sub1 >= double_zeros_obs_sub1)ppc_stat(double_zeros_obs_sub0, double_zeros_rep_sub0, stat = "identity",
binwidth = 0.03) +
ggtitle("PPC: Proportion of Double-Zero Studies (Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_double_zeros_sub0, 3))) +
xlab("Proportion of Double-Zero Studies")ppc_stat(double_zeros_obs_sub1, double_zeros_rep_sub1, stat = "identity") +
ggtitle("PPC: Proportion of Double-Zero Studies (Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_double_zeros_sub1, 3))) +
xlab("Proportion of Double-Zero Studies")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
2. Mean event rates
mean_rate_control_obs_sub0 <- mean(y_obs[subgroup == 0, 1] / dat$n1[subgroup == 0])
mean_rate_control_obs_sub1 <- mean(y_obs[subgroup == 1, 1] / dat$n1[subgroup == 1])
mean_rate_treatment_obs_sub0 <- mean(y_obs[subgroup == 0, 2] / dat$n2[subgroup == 0])
mean_rate_treatment_obs_sub1 <- mean(y_obs[subgroup == 1, 2] / dat$n2[subgroup == 1])
mean_rate_control_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 0, 1] / dat$n1[subgroup == 0])
})
mean_rate_control_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 1, 1] / dat$n1[subgroup == 1])
})
mean_rate_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 0, 2] / dat$n2[subgroup == 0])
})
mean_rate_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 1, 2] / dat$n2[subgroup == 1])
})
mean_rate_control_rep_sub0 <- as.matrix(mean_rate_control_rep_sub0)
mean_rate_control_rep_sub1 <- as.matrix(mean_rate_control_rep_sub1)
mean_rate_treatment_rep_sub0 <- as.matrix(mean_rate_treatment_rep_sub0)
mean_rate_treatment_rep_sub1 <- as.matrix(mean_rate_treatment_rep_sub1)
p_value_mean_rate_control_sub0 <- mean(mean_rate_control_rep_sub0 >= mean_rate_control_obs_sub0)
p_value_mean_rate_control_sub1 <- mean(mean_rate_control_rep_sub1 >= mean_rate_control_obs_sub1)
p_value_mean_rate_treatment_sub0 <- mean(mean_rate_treatment_rep_sub0 >= mean_rate_treatment_obs_sub0)
p_value_mean_rate_treatment_sub1 <- mean(mean_rate_treatment_rep_sub1 >= mean_rate_treatment_obs_sub1)ppc_stat(mean_rate_control_obs_sub0, mean_rate_control_rep_sub0, stat = "identity") +
ggtitle("PPC: Mean Event Rate (Control, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_control_sub0, 3))) +
xlab("Mean Event Rate (Control)")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(mean_rate_control_obs_sub1, mean_rate_control_rep_sub1, stat = "identity") +
ggtitle("PPC: Mean Event Rate (Control, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_control_sub1, 3))) +
xlab("Mean Event Rate (Control)")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(mean_rate_treatment_obs_sub0, mean_rate_treatment_rep_sub0, stat = "identity") +
ggtitle("PPC: Mean Event Rate (Treatment, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_treatment_sub0, 3))) +
xlab("Mean Event Rate (Treatment)")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(mean_rate_treatment_obs_sub1, mean_rate_treatment_rep_sub1, stat = "identity") +
ggtitle("PPC: Mean Event Rate (Treatment, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_mean_rate_treatment_sub1, 3))) +
xlab("Mean Event Rate (Treatment)")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
3. Total event counts
total_events_control_obs_sub0 <- sum(y_obs[subgroup == 0, 1])
total_events_control_obs_sub1 <- sum(y_obs[subgroup == 1, 1])
total_events_treatment_obs_sub0 <- sum(y_obs[subgroup == 0, 2])
total_events_treatment_obs_sub1 <- sum(y_obs[subgroup == 1, 2])
total_events_control_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
sum(y_mat[subgroup == 0, 1])
})
total_events_control_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
sum(y_mat[subgroup == 1, 1])
})
total_events_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
sum(y_mat[subgroup == 0, 2])
})
total_events_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
sum(y_mat[subgroup == 1, 2])
})
total_events_control_rep_sub0 <- as.matrix(total_events_control_rep_sub0)
total_events_control_rep_sub1 <- as.matrix(total_events_control_rep_sub1)
total_events_treatment_rep_sub0 <- as.matrix(total_events_treatment_rep_sub0)
total_events_treatment_rep_sub1 <- as.matrix(total_events_treatment_rep_sub1)
p_value_total_control_sub0 <- mean(total_events_control_rep_sub0 >= total_events_control_obs_sub0)
p_value_total_control_sub1 <- mean(total_events_control_rep_sub1 >= total_events_control_obs_sub1)
p_value_total_treatment_sub0 <- mean(total_events_treatment_rep_sub0 >= total_events_treatment_obs_sub0)
p_value_total_treatment_sub1 <- mean(total_events_treatment_rep_sub1 >= total_events_treatment_obs_sub1)ppc_stat(total_events_control_obs_sub0, total_events_control_rep_sub0, stat = "identity") +
ggtitle("PPC: Total Events (Control, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_total_control_sub0, 3))) +
xlab("Total Events in Control Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(total_events_control_obs_sub1, total_events_control_rep_sub1, stat = "identity") +
ggtitle("PPC: Total Events (Control, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_total_control_sub1, 3))) +
xlab("Total Events in Control Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(total_events_treatment_obs_sub0, total_events_treatment_rep_sub0, stat = "identity") +
ggtitle("PPC: Total Events (Treatment, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_total_treatment_sub0, 3))) +
xlab("Total Events in Treatment Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(total_events_treatment_obs_sub1, total_events_treatment_rep_sub1, stat = "identity") +
ggtitle("PPC: Total Events (Treatment, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_total_treatment_sub1, 3))) +
xlab("Total Events in Treatment Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
4. Proportion of zero events per arm
prop_zeros_control_obs_sub0 <- mean(y_obs[subgroup == 0, 1] == 0)
prop_zeros_control_obs_sub1 <- mean(y_obs[subgroup == 1, 1] == 0)
prop_zeros_treatment_obs_sub0 <- mean(y_obs[subgroup == 0, 2] == 0)
prop_zeros_treatment_obs_sub1 <- mean(y_obs[subgroup == 1, 2] == 0)
prop_zeros_control_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 0, 1] == 0)
})
prop_zeros_control_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 1, 1] == 0)
})
prop_zeros_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 0, 2] == 0)
})
prop_zeros_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
mean(y_mat[subgroup == 1, 2] == 0)
})
prop_zeros_control_rep_sub0 <- as.matrix(prop_zeros_control_rep_sub0)
prop_zeros_control_rep_sub1 <- as.matrix(prop_zeros_control_rep_sub1)
prop_zeros_treatment_rep_sub0 <- as.matrix(prop_zeros_treatment_rep_sub0)
prop_zeros_treatment_rep_sub1 <- as.matrix(prop_zeros_treatment_rep_sub1)
p_value_prop_zeros_control_sub0 <- mean(prop_zeros_control_rep_sub0 >= prop_zeros_control_obs_sub0)
p_value_prop_zeros_control_sub1 <- mean(prop_zeros_control_rep_sub1 >= prop_zeros_control_obs_sub1)
p_value_prop_zeros_treatment_sub0 <- mean(prop_zeros_treatment_rep_sub0 >= prop_zeros_treatment_obs_sub0)
p_value_prop_zeros_treatment_sub1 <- mean(prop_zeros_treatment_rep_sub1 >= prop_zeros_treatment_obs_sub1)ppc_stat(prop_zeros_control_obs_sub0, prop_zeros_control_rep_sub0, stat = "identity",
binwidth = 0.03) +
ggtitle("PPC: Proportion of Zero Events (Control, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_control_sub0, 3))) +
xlab("Proportion of Zero Events in Control Group")ppc_stat(prop_zeros_control_obs_sub1, prop_zeros_control_rep_sub1, stat = "identity",
binwidth = 0.02) +
ggtitle("PPC: Proportion of Zero Events (Control, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_control_sub1, 3))) +
xlab("Proportion of Zero Events in Control Group")ppc_stat(prop_zeros_treatment_obs_sub0, prop_zeros_treatment_rep_sub0, stat = "identity",
binwidth = 0.03) +
ggtitle("PPC: Proportion of Zero Events (Treatment, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_treatment_sub0, 3))) +
xlab("Proportion of Zero Events in Treatment Group")ppc_stat(prop_zeros_treatment_obs_sub1, prop_zeros_treatment_rep_sub1, stat = "identity",
binwidth = 0.02) +
ggtitle("PPC: Proportion of Zero Events (Treatment, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_prop_zeros_treatment_sub1, 3))) +
xlab("Proportion of Zero Events in Treatment Group")5. Variance of event rates
var_rate_control_obs_sub0 <- var(y_obs[subgroup == 0, 1] / dat$n1[subgroup == 0])
var_rate_control_obs_sub1 <- var(y_obs[subgroup == 1, 1] / dat$n1[subgroup == 1])
var_rate_treatment_obs_sub0 <- var(y_obs[subgroup == 0, 2] / dat$n2[subgroup == 0])
var_rate_treatment_obs_sub1 <- var(y_obs[subgroup == 1, 2] / dat$n2[subgroup == 1])
var_rate_control_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
var(y_mat[subgroup == 0, 1] / dat$n1[subgroup == 0])
})
var_rate_control_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
var(y_mat[subgroup == 1, 1] / dat$n1[subgroup == 1])
})
var_rate_treatment_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
var(y_mat[subgroup == 0, 2] / dat$n2[subgroup == 0])
})
var_rate_treatment_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
var(y_mat[subgroup == 1, 2] / dat$n2[subgroup == 1])
})
var_rate_control_rep_sub0 <- as.matrix(var_rate_control_rep_sub0)
var_rate_control_rep_sub1 <- as.matrix(var_rate_control_rep_sub1)
var_rate_treatment_rep_sub0 <- as.matrix(var_rate_treatment_rep_sub0)
var_rate_treatment_rep_sub1 <- as.matrix(var_rate_treatment_rep_sub1)
p_value_var_rate_control_sub0 <- mean(var_rate_control_rep_sub0 >= var_rate_control_obs_sub0)
p_value_var_rate_control_sub1 <- mean(var_rate_control_rep_sub1 >= var_rate_control_obs_sub1)
p_value_var_rate_treatment_sub0 <- mean(var_rate_treatment_rep_sub0 >= var_rate_treatment_obs_sub0)
p_value_var_rate_treatment_sub1 <- mean(var_rate_treatment_rep_sub1 >= var_rate_treatment_obs_sub1)ppc_stat(var_rate_control_obs_sub0, var_rate_control_rep_sub0, stat = "identity") +
ggtitle("PPC: Variance of Event Rates (Control, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_control_sub0, 3))) +
xlab("Variance of Event Rates in Control Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(var_rate_control_obs_sub1, var_rate_control_rep_sub1, stat = "identity") +
ggtitle("PPC: Variance of Event Rates (Control, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_control_sub1, 3))) +
xlab("Variance of Event Rates in Control Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(var_rate_treatment_obs_sub0, var_rate_treatment_rep_sub0, stat = "identity") +
ggtitle("PPC: Variance of Event Rates (Treatment, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_treatment_sub0, 3))) +
xlab("Variance of Event Rates in Treatment Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ppc_stat(var_rate_treatment_obs_sub1, var_rate_treatment_rep_sub1, stat = "identity") +
ggtitle("PPC: Variance of Event Rates (Treatment, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_var_rate_treatment_sub1, 3))) +
xlab("Variance of Event Rates in Treatment Group")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
6. Correlation between Control and Treatment Event Proportions
# Subgroup 0: Low_Dose == 0
sub0_idx <- which(subgroup == 0)
# Observed correlation for subgroup 0
p_control_obs_sub0 <- y_obs[sub0_idx, 1] / dat$n1[sub0_idx]
p_treatment_obs_sub0 <- y_obs[sub0_idx, 2] / dat$n2[sub0_idx]
corr_obs_sub0 <- cor(p_control_obs_sub0, p_treatment_obs_sub0)
# Replicated correlations for subgroup 0
corr_rep_sub0 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
p_control_rep <- y_mat[sub0_idx, 1] / dat$n1[sub0_idx]
p_treatment_rep <- y_mat[sub0_idx, 2] / dat$n2[sub0_idx]
cor(p_control_rep, p_treatment_rep)
})Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
# Convert to matrix for ppc_stat
corr_rep_sub0 <- as.matrix(corr_rep_sub0) |> na.omit()
# Compute posterior p-value for subgroup 0
p_value_corr_sub0 <- mean(corr_rep_sub0 >= corr_obs_sub0)
# Generate PPC plot for subgroup 0
ppc_stat(corr_obs_sub0, corr_rep_sub0, stat = "identity") +
ggtitle("PPC: Correlation (Control vs. Treatment, Subgroup 0)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_corr_sub0, 3))) +
xlab("Correlation")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Subgroup 1: Low_Dose == 1
sub1_idx <- which(subgroup == 1)
# Observed correlation for subgroup 1
p_control_obs_sub1 <- y_obs[sub1_idx, 1] / dat$n1[sub1_idx]
p_treatment_obs_sub1 <- y_obs[sub1_idx, 2] / dat$n2[sub1_idx]
corr_obs_sub1 <- cor(p_control_obs_sub1, p_treatment_obs_sub1)
# Replicated correlations for subgroup 1
corr_rep_sub1 <- apply(y_rep, 1, function(y) {
y_mat <- matrix(y, nrow = J, ncol = 2)
p_control_rep <- y_mat[sub1_idx, 1] / dat$n1[sub1_idx]
p_treatment_rep <- y_mat[sub1_idx, 2] / dat$n2[sub1_idx]
cor(p_control_rep, p_treatment_rep)
})Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
Warning in cor(p_control_rep, p_treatment_rep): the standard deviation is zero
# Convert to matrix for ppc_stat
corr_rep_sub1 <- as.matrix(corr_rep_sub1) |> na.omit()
# Compute posterior p-value for subgroup 1
p_value_corr_sub1 <- mean(corr_rep_sub1 >= corr_obs_sub1)
# Generate PPC plot for subgroup 1
ppc_stat(corr_obs_sub1, corr_rep_sub1, stat = "identity") +
ggtitle("PPC: Correlation (Control vs. Treatment, Subgroup 1)") +
labs(subtitle = paste("Posterior p-value =", round(p_value_corr_sub1, 3))) +
xlab("Correlation")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
7. Predicted Study-Specific Control/Treatment Probabiltities
# Compute study-specific probabilities
p_control_n <- matrix(NA, n_samples, J)
p_treatment_n <- matrix(NA, n_samples, J)
for (j in 1:J) {
nu_control_j <- draws1[[paste0("nu[", j, ",1]")]]
nu_treatment_j <- draws1[[paste0("nu[", j, ",2]")]]
# Adjust fixed effects for subgroup
p_control_n[, j] <- plogis(mu_control + beta_control * subgroup[j] + nu_control_j)
p_treatment_n[, j] <- plogis(mu_treatment + beta_treatment * subgroup[j] + nu_treatment_j)
}
control_long <- data.frame(
Probability = as.vector(p_control_n),
Study = factor(rep(1:J, each = n_samples)),
Group = "Control",
Subgroup = factor(rep(subgroup, each = n_samples), levels = c(0, 1), labels = c("Subgroup 0", "Subgroup 1"))
)
treatment_long <- data.frame(
Probability = as.vector(p_treatment_n),
Study = factor(rep(1:J, each = n_samples)),
Group = "Treatment",
Subgroup = factor(rep(subgroup, each = n_samples), levels = c(0, 1), labels = c("Subgroup 0", "Subgroup 1"))
)
# Combine data for unified visualization
data_long <- rbind(control_long, treatment_long)ggplot(control_long, aes(x = Probability, group = Study)) +
geom_density(alpha = 0.01, linewidth = 0.03) +
facet_grid(Group ~ Subgroup, scales = "free_y") +
labs(title = "Density of Predicted Study-Specific Control Probabilities",
x = "Study-Specific Probability",
y = "Density") +
theme_minimal() ggplot(treatment_long, aes(x = Probability, group = Study)) +
geom_density(alpha = 0.01, linewidth = 0.03) +
facet_grid(Group ~ Subgroup, scales = "free_y") +
labs(title = "Density of Predicted Study-Specific Treatment Probabilities",
x = "Probability",
y = "Density") +
theme_minimal() This plots shows the predicted probability density for each study as heatmaps. Each row represents a study in our sample. Red points depict the observed study-specific probability.
# Heatmap with empirical probabilities and subgroup faceting
ggplot(control_long, aes(x = Probability, y = Study)) +
geom_bin2d(binwidth = c(0.02, 1)) +
scale_fill_gradient(low = "white", high = "blue", name = "Density") +
geom_point(data = empirical_probabilities, aes(x = p1, y = Study), color = "red") +
facet_grid(Group ~ Subgroup, scales = "free_y") +
labs(title = "Heatmap of Predicted Study-Specific Control Probabilities",
x = "Probability",
y = "Study",
fill = "Density") +
theme_minimal() +
theme(strip.text = element_text(size = 12, face = "bold"))ggplot(treatment_long, aes(x = Probability, y = Study)) +
geom_bin2d(binwidth = c(0.02, 1)) +
scale_fill_gradient(low = "white", high = "blue", name = "Density") +
geom_point(data = empirical_probabilities, aes(x = p2, y = Study), color = "red") +
facet_grid(Group ~ Subgroup, scales = "free_y") +
labs(title = "Heatmap of Predicted Study-Specific Treatment Probabilities",
x = "Probability",
y = "Study",
fill = "Density") +
theme_minimal() +
theme(strip.text = element_text(size = 12, face = "bold"))